Data Screeining

First, let’s bring in the data and munge it appropriately.

# Set working directory ----
#setwd("/Users/bfoster/Desktop/2017-edc/science-fairs-analyses")
# Load packaes ----
if (!require("pacman")) install.packages("pacman")
pacman::p_load(psych, ggplot2, ggalt, ggthemes, 
    readr, dplyr, knitr, scales, pander, kableExtra, stringr, scales,
    mirt, ltm, tidyverse, formattable, gridExtra, tidyverse, broom, lavaan)

# Import the data ----
joined.dat <- readRDS(file = "../data/joined.dat")

# Munge data ----
items <- joined.dat %>%
  dplyr::select(StudentID, s_postSEP_01, s_postSEP_02, s_postSEP_03, 
                s_postSEP_04, s_postSEP_05, s_postSEP_06, 
                s_postSEP_07, s_postSEP_08, s_postSEP_09, 
                s_postSEP_10, s_postSEP_11, s_postSEP_12, 
                s_postSEP_13, s_postSEP_14) %>%
  rename_(.dots=setNames(names(.), gsub("s_postInt_", "", names(.))))

# create dataframe for item reference
item.ref <- tibble(
  Item = colnames(items)[-1],
  Number = 1:14)

Item Descriptives

The syntax below creates the item descriptive statistics using the ltm packages, and conducts all necessary munging for printing tables and plots.

# easy item descriptive statistics from the 'ltm' package
post.items.descriptives <- descript(items[-1], chi.squared = TRUE, 
                                   B = 1000)
# extract the proportions in each categoty
post.per <- do.call(rbind, lapply((post.items.descriptives[2]), data.frame, 
                                 stringsAsFactors=FALSE)) %>%
  mutate(item = colnames(items)[-1]) %>%
         rename(Wrong = X0, Correct = X1) %>%
  dplyr::select(item, Wrong, Correct)

# convert to long for plotting 
post.per.long <- gather(post.per, cat, value, -item) %>%
  arrange(item)

Analysis of mising data

Let’s look at the percent of missing responses for each item. A color bar has been added to the values in the table to compare the relative proportion missing per each item. Nothing looks alarming.

# extract the proportions in each categoty
do.call(rbind, lapply((post.items.descriptives[7]), data.frame, 
                                 stringsAsFactors=FALSE)) %>%
  rownames_to_column("Statistic") %>%
  filter(Statistic=="missin.(%)") %>%
  gather(item, value, -Statistic) %>% 
  dplyr::select(item, value) %>%
  rename(Percent = value, Item = item) %>% 
  mutate("Percent Missing" = color_bar("lightgreen")((Percent/100)*100)) %>%
  dplyr::select(Item, "Percent Missing") %>%
  kable(digits = 2, format="html", caption="Category Utilization for post-Administration 
        Period", escape = F) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed")) 
Category Utilization for post-Administration Period
Item Percent Missing
s_postSEP_01 1.886792
s_postSEP_02 1.886792
s_postSEP_03 3.144654
s_postSEP_04 4.192872
s_postSEP_05 2.306080
s_postSEP_06 7.337526
s_postSEP_07 4.612159
s_postSEP_08 3.354298
s_postSEP_09 3.983229
s_postSEP_10 4.402516
s_postSEP_11 4.612159
s_postSEP_12 4.402516
s_postSEP_13 4.192872
s_postSEP_14 3.563941

Proportion Correct

Let’s look at the table of the proportions correct for each item in order to see if anything looks aberrant.

# print the table
post.per %>%
  rename(Item = item) %>%
  kable(digits = 3, format="html", caption="Category Utilization for post-Administration 
        Period") %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Category Utilization for post-Administration Period
Item Wrong Correct
s_postSEP_01 0.267 0.733
s_postSEP_02 0.483 0.517
s_postSEP_03 0.400 0.600
s_postSEP_04 0.632 0.368
s_postSEP_05 0.363 0.637
s_postSEP_06 0.308 0.692
s_postSEP_07 0.514 0.486
s_postSEP_08 0.623 0.377
s_postSEP_09 0.570 0.430
s_postSEP_10 0.478 0.522
s_postSEP_11 0.356 0.644
s_postSEP_12 0.765 0.235
s_postSEP_13 0.466 0.534
s_postSEP_14 0.374 0.626

A visualization is provided for another perspective to examine proportion correct. Note, that I’ve added a horizontal line at .50 for reference.

# plot the proportions
p_pl1_prop <- ggplot() + geom_bar(aes(y = value, x = item, fill = cat), 
                                  data = post.per.long, stat="identity") +
  ggtitle("Proportion Correct of SEP Items") + 
  labs(x="Items", y="Proportion of Response Option", fill="Category") +
  scale_fill_ptol() + theme_minimal() +
  geom_hline(yintercept = .5) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) 
p_pl1_prop

Distribution of total score

A histogram of the total score is provided to examine whether the total score for the measure is normally distributed with no obvious ceiling or floor effects.

total <- rowSums(items[-1])
histogram(~total, breaks=10)

Fit the IRT Models

The syntax below fits both the Rasch model, and the 2 & 3-PL models for dichotomous data. Standard errors are calculated based on the sandwich covariance estimate, which was chosen to adjust for nesting in the data. Results for the best fitting model (i.e., lowest sample adjusted BIC), indicated that the 2-PL model should be utilized in subsequent model testing.

# define the number of cores for quicker processing 
mirtCluster(5)  

# drop missing
items.noNA <- na.omit(items)

# run the Rasch model
set.seed(8675309)
model_1pl_rasch <- mirt(items.noNA[-1], 1, 
                        itemtype = 'Rasch', 
                        technical = list(removeEmptyRows=TRUE, parallel=TRUE, NCYCLES = 8000), 
                        SE = TRUE, SE.type = 'sandwich',
                        verbose = FALSE)

#check model convergence
model_1pl_rasch 
## 
## Call:
## mirt(data = items.noNA[-1], model = 1, itemtype = "Rasch", SE = TRUE, 
##     SE.type = "sandwich", verbose = FALSE, technical = list(removeEmptyRows = TRUE, 
##         parallel = TRUE, NCYCLES = 8000))
## 
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 19 EM iterations.
## mirt version: 1.26.3 
## M-step optimizer: L-BFGS-B 
## EM acceleration: Ramsay 
## Number of rectangular quadrature: 61
## 
## Information matrix estimated with method: sandwich
## Condition number of information matrix = 7.9627
## Second-order test: model is a possible local maximum
## 
## Log-likelihood = -3446.151
## Estimated parameters: 15 
## AIC = 6922.302; AICc = 6923.575
## BIC = 6981.909; SABIC = 6934.315
## G2 (16368) = 2321.34, p = 1
## RMSEA = 0, CFI = NaN, TLI = NaN
# 2-PL model
set.seed(8675309)
model_2pl <- mirt(items.noNA[-1], 1, 
                        itemtype = '2PL', 
                        technical = list(removeEmptyRows=TRUE, parallel=TRUE, NCYCLES = 8000), 
                        SE = TRUE, SE.type = 'sandwich',
                        verbose = FALSE)
# check to see that model converged
model_2pl
## 
## Call:
## mirt(data = items.noNA[-1], model = 1, itemtype = "2PL", SE = TRUE, 
##     SE.type = "sandwich", verbose = FALSE, technical = list(removeEmptyRows = TRUE, 
##         parallel = TRUE, NCYCLES = 8000))
## 
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 16 EM iterations.
## mirt version: 1.26.3 
## M-step optimizer: BFGS 
## EM acceleration: Ramsay 
## Number of rectangular quadrature: 61
## 
## Information matrix estimated with method: sandwich
## Condition number of information matrix = 13.18046
## Second-order test: model is a possible local maximum
## 
## Log-likelihood = -3399.808
## Estimated parameters: 28 
## AIC = 6855.617; AICc = 6860.078
## BIC = 6966.884; SABIC = 6878.04
## G2 (16355) = 2228.66, p = 1
## RMSEA = 0, CFI = NaN, TLI = NaN
# 3-PL model (computationally singular. Sample is small, so we don't worry too much)
set.seed(8675309)
model_3pl <- mirt(items.noNA[-1], 1, 
                        itemtype = '3PL',
                        technical = list(removeEmptyRows=TRUE, parallel=TRUE, NCYCLES = 80000),
                        SE = TRUE, SE.type = 'sandwich',
                        verbose = FALSE)
# check model convergence
model_3pl
## 
## Call:
## mirt(data = items.noNA[-1], model = 1, itemtype = "3PL", SE = TRUE, 
##     SE.type = "sandwich", verbose = FALSE, technical = list(removeEmptyRows = TRUE, 
##         parallel = TRUE, NCYCLES = 80000))
## 
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 17614 EM iterations.
## mirt version: 1.26.3 
## M-step optimizer: BFGS 
## EM acceleration: Ramsay 
## Number of rectangular quadrature: 61
## 
## Information matrix estimated with method: sandwich
## Condition number of information matrix = 37974.08
## Second-order test: model is a possible local maximum
## 
## Log-likelihood = -3393.106
## Estimated parameters: 42 
## AIC = 6870.212; AICc = 6880.532
## BIC = 7037.112; SABIC = 6903.847
## G2 (16341) = 2215.25, p = 1
## RMSEA = 0, CFI = NaN, TLI = NaN
# examine sample adjusted BIC for best fitting model
mods.SABIC <- tibble(
  "Rasch" = model_1pl_rasch@Fit$SABIC,
  "2PL" = model_2pl@Fit$SABIC,
  "3PL" = model_3pl@Fit$SABIC)%>%
  gather(key, SABIC)%>%
  arrange(SABIC)

# print table
mods.SABIC %>%
kable(digits = 2, format="html", caption="Model Selection With SABIC", escape = F) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed")) 
Model Selection With SABIC
key SABIC
2PL 6878.04
3PL 6903.85
Rasch 6934.31
# if needed, log-likelihood test can be provided with the command below
#anova(model_1pl_rasch, model_2pl)

IRT coefficients

Inspect the parameter (i.e., IRT adjusted), for the best fitting model. Again, item 12 has a negative discrimination.

as.data.frame(coef(model_2pl, IRTparms = T, simplify = TRUE)) %>%
  rename(discrimination = items.a1,
         difficulty = items.d) %>%
  mutate(Item = colnames(items)[-1]) %>%
  dplyr::select(Item, discrimination, difficulty) %>%
  #arrange(-discrimination) %>%
  kable(digits = 2, format="html", caption="Item IRT Parameters", escape = F) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed")) 
Item IRT Parameters
Item discrimination difficulty
s_postSEP_01 1.11 1.40
s_postSEP_02 0.50 0.08
s_postSEP_03 0.89 0.62
s_postSEP_04 0.69 -0.52
s_postSEP_05 0.84 0.60
s_postSEP_06 0.82 0.94
s_postSEP_07 0.76 -0.04
s_postSEP_08 0.30 -0.49
s_postSEP_09 1.28 -0.34
s_postSEP_10 1.64 0.28
s_postSEP_11 1.00 0.80
s_postSEP_12 -0.19 -1.31
s_postSEP_13 1.39 0.26
s_postSEP_14 0.58 0.66
#mirt:::mirt2traditional(model_2pl)

Category response curves

Next, the category response curves are examined. We’re looking for two things: 1) the location of the item along the ability scale (i.e., difficulty), and 2) how well an item can differentiate among examinees who have abilities above and below the item location (i.e., the discrimination parameter). The steeper the curve, the better it can discriminate. Flatter curves indicate an almost equal probability of getting an item correct at either end of the ability continuum. The plots below indicate several problematic items: 2, 3, 4, 5, 7, 8, and 12. Of these problematic items, item 12 should most certainly be removed from the analyses, as it has a negative discrimination parameter, which yields a monotonically decreasing item response function. This result indicates that people with high ability have a lower probability of responding correctly than people of low ability. The best discriminating items are: 1, 2, 10, 11, and 13, as these all exhibit the steepest curves.

Reference table for CRC plots:

# table for reference
item.ref %>%
   kable(digits = 2, format="html", caption="Item Labels and Reference Numbers", escape = F) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed")) 
Item Labels and Reference Numbers
Item Number
s_postSEP_01 1
s_postSEP_02 2
s_postSEP_03 3
s_postSEP_04 4
s_postSEP_05 5
s_postSEP_06 6
s_postSEP_07 7
s_postSEP_08 8
s_postSEP_09 9
s_postSEP_10 10
s_postSEP_11 11
s_postSEP_12 12
s_postSEP_13 13
s_postSEP_14 14

Generate CRCs for each item:

# create function to plot combined CRC and inforamtion 
plotCRC<-function(i){
itemplot(model_2pl, i, type = 'infotrace')
}

# plot all items using the function
lapply(colnames(items)[-1], plotCRC)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

Plot CRCs in same frame so that difficulty of items can be visually examined:

# plot the all of the curves in the same lattice plot. 
plot(model_2pl, type = 'trace', which.items = 1:14, facet_items=FALSE)

Model fit

Global Fit: How well do these models fit the data, and do the items appear to behave well given the selected itemtypes? The M2() function is used to assess global model fit. Overall, the model fits the data well.

Criteria for evaluating overall model fit:

  • RMSEA <= .05

  • SRMR <= .05

  • Non-significant M2 statistic

  • TLI and CFI >= .90

# Global fit ---
# M2
M2_model_2pl <- M2(model_2pl, impute = 100, residmat = FALSE)

M2_model_2pl %>%
kable(digits = 3, format="html", caption="Global Fit Statistics", escape = F) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed")) 
Global Fit Statistics
M2 df p RMSEA RMSEA_5 RMSEA_95 SRMSR TLI CFI
stats 72.407 77 0.627 0 0 0.025 0.04 1.009 1

Item Fit: The itemfit() in a piece-wise assessment of item fit using Orlando and Thissen’s (2000) S_X2 statistic. An alpha of <= .01 is typically used to indicate misfitting items. Results hint at some issues with potentially misfitting items (i.e., 12, 10, 5, and 8), but none of them reach the critical alpha value. Followup analyses could be conducted using itemGAM()to estimate the response curves for these items to hypothesize as to why these items are on the fringe of misfitting.

# Piecewise misfit ---

# item fit statistics 
itemfit_2pl <- itemfit(model_2pl, impute = 100) 

# apply a false discovery rate to the p-value 
# p.fdr <- p.adjust(itemfit_2pl$p.S_X2,"BH")
# itemfit_2pl <- cbind(itemfit_2pl, p.fdr) # bind to postvious work

# sort the item fit statistics by p-value
itemfit_2pl %>%
  slice(1:14) %>% 
  arrange(p.S_X2) %>%
  kable(digits = 2, format="html", caption="Item Fit Statistics", escape = F) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed")) 
Item Fit Statistics
item S_X2 df.S_X2 p.S_X2
s_postSEP_12 19.55 10 0.03
s_postSEP_10 13.09 7 0.07
s_postSEP_05 15.22 9 0.09
s_postSEP_08 14.99 9 0.09
s_postSEP_07 12.49 9 0.19
s_postSEP_01 10.02 8 0.26
s_postSEP_03 8.96 9 0.44
s_postSEP_13 6.46 8 0.60
s_postSEP_04 5.38 8 0.72
s_postSEP_11 5.36 8 0.72
s_postSEP_06 6.19 9 0.72
s_postSEP_09 5.02 8 0.76
s_postSEP_02 4.01 9 0.91
s_postSEP_14 4.00 9 0.91
# item GAM ----
# items.na <- na.omit(items)
# model_2pl.na <- mirt(items.na[-1], 1, 
#                         itemtype = '2PL', 
#                         technical = list(removeEmptyRows=TRUE, parallel=TRUE, NCYCLES = 8000), 
#                         SE = TRUE, SE.type = 'sandwich',
#                         verbose = FALSE)
# colnames(items.na)
# Theta <- fscores(model_2pl.na, method = "MAP", full.scores = TRUE)
# IG12 <- itemGAM(items.na[,13], Theta) 
# summary(IG12)
# plot(IG12)

Assumptions

Local dependence: Yen’s index of local dependence Q3 is provided. Q3 is a pairwise index of correlation of the residuals from the IRT model. If some sets of items present a significant level of residual correlation, then those items can be considered as locally dependent (Yen, 1993). Q3 statistics >= .2 are automatic. The Q3 statistic index indicated some local dependence between item 13 and item 1 and item 10. Recall from the analysis of item fit that items 10 and 13 were potentially problematic. Several items exhibited issues with local dependence: 7, 9, 10, and 13.

Example of the constrained model statement:

  mod.statement <- '
  THETA = s_preSEP_01, s_preSEP_02, s_preSEP_03, s_preSEP_04, s_preSEP_05, s_preSEP_06, s_preSEP_07,
  s_preSEP_08,   s_preSEP_09, s_preSEP_10, s_preSEP_11, s_preSEP_12, s_preSEP_13, s_preSEP_14
  
  # two items exhibiting local dependence (should these two items not be included in the THETA call above?)
  RESID.THETA = s_preSEP_7, s_preSEP_09,  s_preSEP_10, s_preSEP_13         
  
  # constrain the two slopes to identify the construct for the factor with local dependence
  CONSTRAIN = (s_preSEP_7, s_preSEP_09,  s_preSEP_10, s_preSEP_13 , a2)   
  COV = THETA*THETA #note THETA 1 and THETA 2 are orthogonal
  '
tidy(residuals(model_2pl, type = 'Q3', method = 'ML', suppress = .19))
## Q3 matrix:
##       .rownames s_postSEP_01 s_postSEP_02 s_postSEP_03 s_postSEP_04
## 1  s_postSEP_01            1           NA           NA           NA
## 2  s_postSEP_02           NA            1           NA           NA
## 3  s_postSEP_03           NA           NA            1           NA
## 4  s_postSEP_04           NA           NA           NA            1
## 5  s_postSEP_05           NA           NA           NA           NA
## 6  s_postSEP_06           NA           NA           NA           NA
## 7  s_postSEP_07           NA           NA           NA           NA
## 8  s_postSEP_08           NA           NA           NA           NA
## 9  s_postSEP_09           NA           NA           NA           NA
## 10 s_postSEP_10           NA           NA           NA           NA
## 11 s_postSEP_11           NA           NA           NA           NA
## 12 s_postSEP_12           NA           NA           NA           NA
## 13 s_postSEP_13           NA           NA           NA           NA
## 14 s_postSEP_14           NA           NA           NA           NA
##    s_postSEP_05 s_postSEP_06 s_postSEP_07 s_postSEP_08 s_postSEP_09
## 1            NA           NA           NA           NA           NA
## 2            NA           NA           NA           NA           NA
## 3            NA           NA   -0.2353762           NA   -0.1907443
## 4            NA           NA           NA           NA           NA
## 5             1           NA           NA           NA           NA
## 6            NA            1           NA           NA           NA
## 7            NA           NA    1.0000000           NA           NA
## 8            NA           NA           NA            1           NA
## 9            NA           NA           NA           NA    1.0000000
## 10           NA           NA           NA           NA           NA
## 11           NA           NA           NA           NA           NA
## 12           NA           NA           NA           NA           NA
## 13           NA           NA           NA           NA           NA
## 14           NA           NA           NA           NA           NA
##    s_postSEP_10 s_postSEP_11 s_postSEP_12 s_postSEP_13 s_postSEP_14
## 1            NA           NA           NA           NA           NA
## 2            NA           NA           NA           NA           NA
## 3            NA           NA           NA           NA           NA
## 4            NA           NA           NA           NA           NA
## 5            NA           NA           NA           NA           NA
## 6            NA           NA           NA           NA           NA
## 7            NA           NA           NA           NA           NA
## 8            NA           NA           NA           NA           NA
## 9    -0.2030687           NA           NA           NA           NA
## 10    1.0000000           NA           NA    -0.242288           NA
## 11           NA            1           NA           NA           NA
## 12           NA           NA            1           NA           NA
## 13           NA           NA           NA     1.000000           NA
## 14           NA           NA           NA           NA            1

Unidimensionality: A CFA is carried out to test the assumption that the measure is unidimensional. Fit statistics for the measure look OK, except the parameter for estimate for factor loading for item 12 is negative. This item was problematic in the IRT analyses above, as it displayed a negative discrimination parameter. It is an obvious candidate to remove from follow-up analyses.

model.1 <- '
  # measurement model
    factor =~ s_postSEP_01 + s_postSEP_02 + s_postSEP_03 + 
                s_postSEP_04 + s_postSEP_05 + s_postSEP_06 + 
                s_postSEP_07 + s_postSEP_08 + s_postSEP_09 + 
                s_postSEP_10 + s_postSEP_11 + s_postSEP_12 +
                s_postSEP_13 + s_postSEP_14
'
fit.1 <- cfa(model.1, data=items.noNA, std.lv = TRUE,
             ordered = c("s_postSEP_01", "s_postSEP_02", "s_postSEP_03", 
                "s_postSEP_04", "s_postSEP_05", "s_postSEP_06", 
                "s_postSEP_07", "s_postSEP_08", "s_postSEP_09", 
                "s_postSEP_10", "s_postSEP_11", "s_postSEP_12", 
                "s_postSEP_13", "s_postSEP_14"))
fitMeasures(fit.1)
##                          npar                          fmin 
##                        28.000                         0.072 
##                         chisq                            df 
##                        56.810                        77.000 
##                        pvalue                  chisq.scaled 
##                         0.959                        72.147 
##                     df.scaled                 pvalue.scaled 
##                        77.000                         0.635 
##          chisq.scaling.factor                baseline.chisq 
##                         0.888                       744.184 
##                   baseline.df               baseline.pvalue 
##                        91.000                         0.000 
##         baseline.chisq.scaled            baseline.df.scaled 
##                       614.912                        91.000 
##        baseline.pvalue.scaled baseline.chisq.scaling.factor 
##                         0.000                         1.247 
##                           cfi                           tli 
##                         1.000                         1.037 
##                          nnfi                           rfi 
##                         1.037                         0.910 
##                           nfi                          pnfi 
##                         0.924                         0.782 
##                           ifi                           rni 
##                         1.030                         1.031 
##                    cfi.scaled                    tli.scaled 
##                         1.000                         1.011 
##                    cfi.robust                    tli.robust 
##                            NA                            NA 
##                   nnfi.scaled                   nnfi.robust 
##                         1.011                            NA 
##                    rfi.scaled                    nfi.scaled 
##                         0.861                         0.883 
##                    ifi.scaled                    rni.scaled 
##                         0.883                         1.007 
##                    rni.robust                         rmsea 
##                            NA                         0.000 
##                rmsea.ci.lower                rmsea.ci.upper 
##                         0.000                         0.000 
##                  rmsea.pvalue                  rmsea.scaled 
##                         1.000                         0.000 
##         rmsea.ci.lower.scaled         rmsea.ci.upper.scaled 
##                         0.000                         0.025 
##           rmsea.pvalue.scaled                  rmsea.robust 
##                         1.000                            NA 
##         rmsea.ci.lower.robust         rmsea.ci.upper.robust 
##                         0.000                            NA 
##           rmsea.pvalue.robust                           rmr 
##                            NA                         0.057 
##                    rmr_nomean                          srmr 
##                         0.060                         0.060 
##                  srmr_bentler           srmr_bentler_nomean 
##                         0.057                         0.060 
##                   srmr_bollen            srmr_bollen_nomean 
##                         0.057                         0.060 
##                    srmr_mplus             srmr_mplus_nomean 
##                         0.057                         0.060 
##                         cn_05                         cn_01 
##                       680.556                       751.535 
##                           gfi                          agfi 
##                         0.953                         0.936 
##                          pgfi                           mfi 
##                         0.699                         1.026
summary(fit.1, fit.measures=TRUE)
## lavaan (0.5-23.1097) converged normally after   9 iterations
## 
##   Number of observations                           393
## 
##   Estimator                                       DWLS      Robust
##   Minimum Function Test Statistic               56.810      72.147
##   Degrees of freedom                                77          77
##   P-value (Chi-square)                           0.959       0.635
##   Scaling correction factor                                  0.888
##   Shift parameter                                            8.198
##     for simple second-order correction (Mplus variant)
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic              744.184     614.912
##   Degrees of freedom                                91          91
##   P-value                                        0.000       0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    1.000       1.000
##   Tucker-Lewis Index (TLI)                       1.037       1.011
## 
##   Robust Comparative Fit Index (CFI)                            NA
##   Robust Tucker-Lewis Index (TLI)                               NA
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000       0.000
##   90 Percent Confidence Interval          0.000  0.000       0.000  0.025
##   P-value RMSEA <= 0.05                          1.000       1.000
## 
##   Robust RMSEA                                                  NA
##   90 Percent Confidence Interval                             0.000     NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.060       0.060
## 
## Weighted Root Mean Square Residual:
## 
##   WRMR                                           0.736       0.736
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                           Robust.sem
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   factor =~                                           
##     s_postSEP_01      0.532    0.066    8.024    0.000
##     s_postSEP_02      0.302    0.073    4.135    0.000
##     s_postSEP_03      0.478    0.067    7.118    0.000
##     s_postSEP_04      0.384    0.073    5.296    0.000
##     s_postSEP_05      0.445    0.066    6.716    0.000
##     s_postSEP_06      0.438    0.068    6.446    0.000
##     s_postSEP_07      0.428    0.067    6.402    0.000
##     s_postSEP_08      0.192    0.076    2.539    0.011
##     s_postSEP_09      0.614    0.062    9.898    0.000
##     s_postSEP_10      0.697    0.056   12.434    0.000
##     s_postSEP_11      0.509    0.066    7.739    0.000
##     s_postSEP_12     -0.116    0.085   -1.364    0.173
##     s_postSEP_13      0.640    0.060   10.584    0.000
##     s_postSEP_14      0.331    0.072    4.578    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .s_postSEP_01      0.000                           
##    .s_postSEP_02      0.000                           
##    .s_postSEP_03      0.000                           
##    .s_postSEP_04      0.000                           
##    .s_postSEP_05      0.000                           
##    .s_postSEP_06      0.000                           
##    .s_postSEP_07      0.000                           
##    .s_postSEP_08      0.000                           
##    .s_postSEP_09      0.000                           
##    .s_postSEP_10      0.000                           
##    .s_postSEP_11      0.000                           
##    .s_postSEP_12      0.000                           
##    .s_postSEP_13      0.000                           
##    .s_postSEP_14      0.000                           
##     factor            0.000                           
## 
## Thresholds:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     s_pstSEP_01|t1   -0.693    0.069  -10.017    0.000
##     s_pstSEP_02|t1   -0.048    0.063   -0.756    0.450
##     s_pstSEP_03|t1   -0.328    0.065   -5.079    0.000
##     s_pstSEP_04|t1    0.294    0.064    4.578    0.000
##     s_pstSEP_05|t1   -0.321    0.065   -4.979    0.000
##     s_pstSEP_06|t1   -0.509    0.066   -7.670    0.000
##     s_pstSEP_07|t1    0.022    0.063    0.353    0.724
##     s_pstSEP_08|t1    0.301    0.064    4.678    0.000
##     s_pstSEP_09|t1    0.163    0.064    2.568    0.010
##     s_pstSEP_10|t1   -0.112    0.063   -1.763    0.078
##     s_pstSEP_11|t1   -0.410    0.065   -6.279    0.000
##     s_pstSEP_12|t1    0.794    0.071   11.160    0.000
##     s_pstSEP_13|t1   -0.112    0.063   -1.763    0.078
##     s_pstSEP_14|t1   -0.382    0.065   -5.879    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .s_postSEP_01      0.717                           
##    .s_postSEP_02      0.909                           
##    .s_postSEP_03      0.772                           
##    .s_postSEP_04      0.852                           
##    .s_postSEP_05      0.802                           
##    .s_postSEP_06      0.808                           
##    .s_postSEP_07      0.817                           
##    .s_postSEP_08      0.963                           
##    .s_postSEP_09      0.623                           
##    .s_postSEP_10      0.514                           
##    .s_postSEP_11      0.741                           
##    .s_postSEP_12      0.987                           
##    .s_postSEP_13      0.590                           
##    .s_postSEP_14      0.890                           
##     factor            1.000                           
## 
## Scales y*:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     s_postSEP_01      1.000                           
##     s_postSEP_02      1.000                           
##     s_postSEP_03      1.000                           
##     s_postSEP_04      1.000                           
##     s_postSEP_05      1.000                           
##     s_postSEP_06      1.000                           
##     s_postSEP_07      1.000                           
##     s_postSEP_08      1.000                           
##     s_postSEP_09      1.000                           
##     s_postSEP_10      1.000                           
##     s_postSEP_11      1.000                           
##     s_postSEP_12      1.000                           
##     s_postSEP_13      1.000                           
##     s_postSEP_14      1.000

Results testing a 1-factor solution for the measure, with item 12 removed, also look good.

# dropping item 12
model.2 <- '
  # measurement model
    factor =~ s_postSEP_01 + s_postSEP_02 + s_postSEP_03 + 
                s_postSEP_04 + s_postSEP_05 + s_postSEP_06 + 
                s_postSEP_07 + s_postSEP_08 + s_postSEP_09 + 
                s_postSEP_10 + s_postSEP_11 + 
                s_postSEP_13 + s_postSEP_14
'
fit.2 <- cfa(model.2, data=items.noNA, std.lv = TRUE,
              ordered = c("s_postSEP_01", "s_postSEP_02", "s_postSEP_03", 
                "s_postSEP_04", "s_postSEP_05", "s_postSEP_06", 
                "s_postSEP_07", "s_postSEP_08", "s_postSEP_09", 
                "s_postSEP_10", "s_postSEP_11", "s_postSEP_12", 
                "s_postSEP_13", "s_postSEP_14"))
fitMeasures(fit.2)
##                          npar                          fmin 
##                        26.000                         0.059 
##                         chisq                            df 
##                        46.142                        65.000 
##                        pvalue                  chisq.scaled 
##                         0.963                        59.472 
##                     df.scaled                 pvalue.scaled 
##                        65.000                         0.670 
##          chisq.scaling.factor                baseline.chisq 
##                         0.861                       728.599 
##                   baseline.df               baseline.pvalue 
##                        78.000                         0.000 
##         baseline.chisq.scaled            baseline.df.scaled 
##                       603.652                        78.000 
##        baseline.pvalue.scaled baseline.chisq.scaling.factor 
##                         0.000                         1.238 
##                           cfi                           tli 
##                         1.000                         1.035 
##                          nnfi                           rfi 
##                         1.035                         0.924 
##                           nfi                          pnfi 
##                         0.937                         0.781 
##                           ifi                           rni 
##                         1.028                         1.029 
##                    cfi.scaled                    tli.scaled 
##                         1.000                         1.013 
##                    cfi.robust                    tli.robust 
##                            NA                            NA 
##                   nnfi.scaled                   nnfi.robust 
##                         1.013                            NA 
##                    rfi.scaled                    nfi.scaled 
##                         0.882                         0.901 
##                    ifi.scaled                    rni.scaled 
##                         0.901                         1.008 
##                    rni.robust                         rmsea 
##                            NA                         0.000 
##                rmsea.ci.lower                rmsea.ci.upper 
##                         0.000                         0.000 
##                  rmsea.pvalue                  rmsea.scaled 
##                         1.000                         0.000 
##         rmsea.ci.lower.scaled         rmsea.ci.upper.scaled 
##                         0.000                         0.025 
##           rmsea.pvalue.scaled                  rmsea.robust 
##                         1.000                            NA 
##         rmsea.ci.lower.robust         rmsea.ci.upper.robust 
##                         0.000                            NA 
##           rmsea.pvalue.robust                           rmr 
##                            NA                         0.053 
##                    rmr_nomean                          srmr 
##                         0.057                         0.057 
##                  srmr_bentler           srmr_bentler_nomean 
##                         0.053                         0.057 
##                   srmr_bollen            srmr_bollen_nomean 
##                         0.053                         0.057 
##                    srmr_mplus             srmr_mplus_nomean 
##                         0.053                         0.057 
##                         cn_05                         cn_01 
##                       721.600                       803.170 
##                           gfi                          agfi 
##                         0.957                         0.940 
##                          pgfi                           mfi 
##                         0.683                         1.024

Follow-up IRT removing item 12

# 2-PL model
set.seed(8675309)
#colnames(items)
items.noNA_no12 <- dplyr::select(items.noNA, StudentID, s_postSEP_01, s_postSEP_02, s_postSEP_03, s_postSEP_04, s_postSEP_05, s_postSEP_06,
                     s_postSEP_07, s_postSEP_08, s_postSEP_09, s_postSEP_10, s_postSEP_11, s_postSEP_13, s_postSEP_14)
model_2pl_b <- mirt(items.noNA_no12[-1], 1, 
                        itemtype = '2PL', 
                        technical = list(removeEmptyRows=TRUE, parallel=TRUE, NCYCLES = 8000), 
                        SE = TRUE, SE.type = 'sandwich',
                        verbose = FALSE)
# check to see that model converged
model_2pl_b

Call: mirt(data = items.noNA_no12[-1], model = 1, itemtype = “2PL”, SE = TRUE, SE.type = “sandwich”, verbose = FALSE, technical = list(removeEmptyRows = TRUE, parallel = TRUE, NCYCLES = 8000))

Full-information item factor analysis with 1 factor(s). Converged within 1e-04 tolerance after 15 EM iterations. mirt version: 1.26.3 M-step optimizer: BFGS EM acceleration: Ramsay Number of rectangular quadrature: 61

Information matrix estimated with method: sandwich Condition number of information matrix = 12.01045 Second-order test: model is a possible local maximum

Log-likelihood = -3196.725 Estimated parameters: 26 AIC = 6445.451; AICc = 6449.287 BIC = 6548.77; SABIC = 6466.272 G2 (8165) = 1861.41, p = 1 RMSEA = 0, CFI = NaN, TLI = NaN

# item parameter statistics ---- 
as.data.frame(coef(model_2pl_b, IRTparms = T, simplify = TRUE)) %>%
  rename(discrimination = items.a1,
         difficulty = items.d) %>%
  mutate(Item = colnames(items.noNA_no12)[-1]) %>%
  dplyr::select(Item, discrimination, difficulty) %>%
  #arrange(-discrimination) %>%
  kable(digits = 2, format="html", caption="Item IRT Parameters", escape = F) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed")) 
Item IRT Parameters
Item discrimination difficulty
s_postSEP_01 1.12 1.40
s_postSEP_02 0.50 0.08
s_postSEP_03 0.88 0.62
s_postSEP_04 0.69 -0.52
s_postSEP_05 0.85 0.60
s_postSEP_06 0.82 0.94
s_postSEP_07 0.76 -0.04
s_postSEP_08 0.30 -0.49
s_postSEP_09 1.30 -0.34
s_postSEP_10 1.63 0.28
s_postSEP_11 0.99 0.80
s_postSEP_13 1.39 0.25
s_postSEP_14 0.58 0.66
# item fit statistics ---- 
itemfit_2pl_b <- itemfit(model_2pl_b, impute = 100) 

# apply a false discovery rate to the p-value 
# p.fdr <- p.adjust(itemfit_2pl$p.S_X2,"BH")
# itemfit_2pl <- cbind(itemfit_2pl, p.fdr) # bind to postvious work

# sort the item fit statistics by p-value
itemfit_2pl_b %>%
  slice(1:13) %>% 
  arrange(p.S_X2) %>%
  kable(digits = 2, format="html", caption="Item Fit Statistics", escape = F) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Item Fit Statistics
item S_X2 df.S_X2 p.S_X2
s_postSEP_07 14.71 9 0.10
s_postSEP_11 11.28 8 0.19
s_postSEP_05 12.43 9 0.19
s_postSEP_10 9.75 7 0.20
s_postSEP_03 11.26 9 0.26
s_postSEP_09 8.33 8 0.40
s_postSEP_04 9.30 9 0.41
s_postSEP_13 7.44 8 0.49
s_postSEP_06 7.29 8 0.51
s_postSEP_08 7.15 9 0.62
s_postSEP_02 7.03 9 0.63
s_postSEP_01 5.98 8 0.65
s_postSEP_14 5.24 9 0.81
# plot the all of the curves in the same lattice plot ----
plot(model_2pl_b, type = 'trace', which.items = 1:13, facet_items=FALSE)

Test Information

Examine information, SEM, and reliability for the whole measure. The plots below show that information maximizes around an average ability level (i.e., in the range of -2 to + 2), standard errors are lower in this range, and reliability is maximized.

# examine test information
info_model_2plb <- tibble(
  theta = seq(-6,6,.01),
  information = testinfo(model_2pl_b, theta),
  error = 1/sqrt(information),
  reliability = information/(information+1))

# plot test information
plot(model_2pl_b, type='info', MI=1000)

# plot SEM
plot(model_2pl_b, type='SE', MI=1000)

# plot alpha at theta levels
plot(model_2pl_b, type='rxx', MI=1000)

Information Curves

Next, the information curves for each item are examined, looking for overlap in category curves, as well as plateaus in the information curves. Results indicate a most items provide information about students with average ability. Ideally, these information curves should show peaks that are more spread out across the range of the underlying continuum. For example, only items 2, 4, 7, and 8 provide information for students in the above average portion of the latent continuum, though for the same approximate range. This could indicate a lot of redundancy in how the item pool. Finally, the scale of the y-axis should be considered in interpreting these plots.

# table for reference
item.ref %>%
   kable(digits = 2, format="html", caption="Item Labels and Reference Numbers", escape = F) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed")) 
Item Labels and Reference Numbers
Item Number
s_postSEP_01 1
s_postSEP_02 2
s_postSEP_03 3
s_postSEP_04 4
s_postSEP_05 5
s_postSEP_06 6
s_postSEP_07 7
s_postSEP_08 8
s_postSEP_09 9
s_postSEP_10 10
s_postSEP_11 11
s_postSEP_12 12
s_postSEP_13 13
s_postSEP_14 14

Plot the information curves in one grid for easy comparison.

plot(model_2pl, type = 'infotrace', which.items = 1:14, facet_items=FALSE)

Factor scores vs Standardized total scores

The plots below indicate the association between the standardized raw scores for the measure and the several different IRT generated MAP scores.

# Factor scores vs Standardized total scores 
#fs.map <- as.vector(
Theta_postSep <-  fscores(model_2pl_b, method = "MAP", full.scores = TRUE)
STS_postSep <- as.vector(scale(apply((items.noNA_no12)[-1], 1, sum))) 
TOTAL_postSep<- apply((items.noNA_no12)[-1], 1, sum)

# save the factor scores
post.sep.theta <- cbind(
  items.noNA_no12[1], Theta_postSep, TOTAL_postSep, STS_postSep
) %>%
  rename(Theta_postSep = F1)

# write the data
write_csv(post.sep.theta, "../data/post.sep.theta.csv")

# IRT MAP scores vs. standardized scores
p1.map <- ggplot(post.sep.theta, aes(x=TOTAL_postSep, y=Theta_postSep)) + 
  geom_point()+
  geom_smooth() + 
  theme_minimal() + 
  ggtitle("IRT scores vs. Standardized Scores") + 
  labs(y="MAP IRT Score", x="Standardized Scores")

p1.map

# histogram of theta
ggplot(post.sep.theta, aes(post.sep.theta$Theta_postSep)) + geom_histogram(bins=25)

# potential outlier: 1256121